## Enlistments Data Paper ##
## Prepared by Kevin Fahey ##
## And Doug Atkinson ##
## And Alex Artiles ##
## And Joana Trenaska ##
## Compiled 2023-01-01 Using R 4.2.2

##### PREAMBLE #####

## Clear environment
rm(list=ls())


## Set Seed 
set.seed(20120630)


## Set Working Directory
# setwd() command changed to your working directory


## Load Packages 
library(foreign)
library(tidyverse)
library(broom)
library(vroom)
library(psych)
library(xtable)
library(maptools)
library(usmap)
library(fixest)
library(dotwhisker)
library(corrplot)
library(RColorBrewer)


## Read in datasets

# Main dataset for most Figures and Tables
dat <- read.csv("2023-01-01 ww2_vols_data.csv")

# Additional datasets
dat_ww2_women <- read.csv("2023-01-01 dat_ww2_women_all.csv")
dat_ww2_men <- read.csv("2023-01-01 dat_ww2_men_all.csv")



##### DESCRIPTIVE STATISTICS #####

## Table B1: Summary Statistics
dat.descriptive <- dat %>%
  select(mfgwages_prop, retwages_prop, wholwage_prop, servwage_prop, 
         unemployed_prop, fgrad_prop, 
         totpop40, urb940_prop, Dem1940, demcong1940, 
         dissimilarity_all, isolation_all, wf_prop, wm_prop,
         negf21_prop, negm21_prop) %>%
  describe(., na.rm = T, skew = F) %>%
  dplyr::select(-vars, -se, -range)
row.names(dat.descriptive) <- c("Manufacturing Wages", "Retail Wages",
                                "Service Wages", "Wholesale Wages", "Unemployment",
                                "County Pct. Female College Grad",
                                "County Population", "Urban Population", 
                                "1940 Roosevelt Vote Share", 
                                "1940 Dem Cong. Candidate Vote Share",
                                "Dissimilarity (Segregation)", "Isolation (Segregation)",
                                "County Pct. White Female", "County Pct. White Male",
                                "County Pct. Black Female", "County Pct. Black Male")
print(xtable(dat.descriptive, caption = "Descriptive Statistics, covariates in World War II Enlistments database. Covariates focusing on volunteer rates in the war emphasized. Data are pooled across years to create this table.", digits = 4), hline.after = NULL)


## Table B2: Coefficients for Female Volunteerism
model_women <- feols(wvol_prop ~ 
                       mfgwages_prop + retwages_prop + 
                       wholwage_prop + servwage_prop +
                       unemployed_prop + fgrad_prop +
                       totpop40 + urb940_prop +
                       Dem1940 + demcong1940 +
                       dissimilarity_all + isolation_all +
                       wf_prop + wm_prop + negf21_prop + negm21_prop, 
                     data = dat, fixef = c("state", "year"),
                     se = "cluster", cluster = "fipscode")

model_women_white <- feols(wvol_white_prop ~ 
                             mfgwages_prop + retwages_prop + 
                             wholwage_prop + servwage_prop +
                             unemployed_prop + fgrad_prop +
                             totpop40 + urb940_prop +
                             Dem1940 + demcong1940 + 
                             dissimilarity_all + isolation_all +
                             wf_prop + wm_prop + negf21_prop + negm21_prop, 
                           data = dat, fixef = c("state", "year"), 
                           se = "cluster", cluster = "fipscode")

model_women_black <- feols(wvol_black_prop ~ 
                             mfgwages_prop + retwages_prop + 
                             wholwage_prop + servwage_prop +
                             unemployed_prop + fgrad_prop +
                             totpop40 + urb940_prop +
                             Dem1940 + demcong1940 +
                             dissimilarity_all + isolation_all +
                             wf_prop + wm_prop + negf21_prop + negm21_prop, 
                           data = dat, fixef = c("state", "year"), 
                           se = "cluster", cluster = "fipscode")

# Output in etable to LaTeX document editor
etable(model_women, model_women_white, model_women_black, 
       cluster = "fipscode", signif.code = c("**" = 0.01, "*" = 0.05, "." = 0.1),
       tex = T)


## Table B2: Coefficients for Female Volunteerism, without IVs
model_no_fgrad <- feols(wvol_prop ~ 
                          mfgwages_prop + retwages_prop + 
                          wholwage_prop + servwage_prop +
                          unemployed_prop +
                          totpop40 + urb940_prop +
                          Dem1940 + demcong1940 +
                          dissimilarity_all + isolation_all +
                          wf_prop + wm_prop + negf21_prop + negm21_prop, 
                        data = dat, fixef = c("state", "year"),
                        se = "cluster", cluster = "fipscode")

model_no_segregation <- feols(wvol_prop ~ 
                                mfgwages_prop + retwages_prop + 
                                wholwage_prop + servwage_prop +
                                unemployed_prop + fgrad_prop + 
                                totpop40 + urb940_prop +
                                Dem1940 + demcong1940 +
                                wf_prop + wm_prop + negf21_prop + negm21_prop, 
                              data = dat, fixef = c("state", "year"),
                              se = "cluster", cluster = "fipscode")

model_no_econ <- feols(wvol_prop ~ 
                         fgrad_prop +
                         totpop40 + urb940_prop +
                         Dem1940 + demcong1940 +
                         dissimilarity_all + isolation_all +
                         wf_prop + wm_prop + negf21_prop + negm21_prop, 
                       data = dat, fixef = c("state", "year"),
                       se = "cluster", cluster = "fipscode")

# Output to LaTeX document editor
# edit to move Female Grad to top row of coefficients
# edit to move racial isolation and segregation to 2nd and 3rd row of coefficients
etable(model_no_fgrad, model_no_segregation, model_no_econ, 
       cluster = "fipscode", signif.code = c("**" = 0.01, "*" = 0.05, "." = 0.1),
       tex = T)


## Figure C1: Standardized Rate of Male Volunteers by County, 1943
dat_map_men <- dat %>%
  dplyr::filter(year == 43) %>%
  mutate(mvol_output = (mvol_prop - mean(mvol_prop, na.rm = T))/sd(mvol_prop, na.rm = T)) %>%
  dplyr::filter(mvol_output <= 3) %>%
  rename(fips = fipscode)
appendix_figure_c1 <- plot_usmap(regions = "counties", data = dat_map_men,
                          values = "mvol_output", labels = FALSE,
                          color = "white", size = 0.015,
                          exclude = c("AK", "HI")) +
  scale_fill_continuous(low = "grey20", high = "grey50") +
  labs(title = "Men Volunteers as Share of Population, 1943",
       fill = "") +
  theme(legend.position = "bottom")
png("appendix_figure_c1.png", res = 120)
print(appendix_figure_c1)
dev.off()


## Figure C2: Correlation Matrix
df_cor_men <- dat %>%
  dplyr::select(mvol_prop, mvol_white_prop, mvol_black_prop,
                mfgwages_prop, retwages_prop,
                wholwage_prop, servwage_prop, unemployed_prop, fgrad_prop, 
                totpop40, urb940_prop, Dem1940, demcong1940, 
                dissimilarity_all, isolation_all, wf_prop, wm_prop,
                negf21_prop, negm21_prop)
colnames(df_cor_men) <- c("Vols", "Vols(W)", "Vols(B)", "MfgWage", "RetWage", 
                          "WholWage", "ServWage", "Unem", "FGrad", "Pop40",
                          "Urban", "DemPres", "DemCong", "Dissim", "Isolat",
                          "WhFem", "WhMal", "BlFem", "BlMal")

# Find correlations
df_correlation_men <- round(cor(df_cor_men, use = "pairwise.complete.obs"), 2)

# Find p-value
cor.mtest <- function(mat, ...) {
  mat <- as.matrix(mat)
  n <- ncol(mat)
  p.mat<- matrix(NA, n, n)
  diag(p.mat) <- 0
  for (i in 1:(n - 1)) {
    for (j in (i + 1):n) {
      tmp <- cor.test(mat[, i], mat[, j], ...)
      p.mat[i, j] <- p.mat[j, i] <- tmp$p.value
    }
  }
  colnames(p.mat) <- rownames(p.mat) <- colnames(mat)
  p.mat
}

p.vals <- cor.mtest(df_cor_men)

# And output
appendix_figure_c2 <- corrplot(df_correlation_men, method = "color", type = "lower",
                         addCoef.col = "black",
                         col=brewer.pal(n=8, name="Greys"), tl.srt = 45,
                         tl.col = "black", p.mat = p.vals, sig.level = 0.05,
                         insig = "blank")
dev.copy(png, 'appendix_figure_c2.png', width = 750, height = 750)
dev.off()


## Figure C3: Male Volunteers, 1941-1945
df_summary_men <- dat_ww2_men %>%
  group_by(enlistyear) %>%
  summarise(a_menvolunteers = sum(menvolunteers, na.rm = T),
            b_men_whitevolunteers = sum(men_whitevolunteers, na.rm = T),
            c_men_blackvolunteers = sum(men_blackvolunteers, na.rm = T)) %>%
  pivot_longer(cols = c(a_menvolunteers, b_men_whitevolunteers, 
                        c_men_blackvolunteers), 
               names_to = "type", 
               values_to = "vols") %>%
  mutate(enlistyear = as.character(paste(enlistyear, sep = ""))) %>%
  filter(enlistyear >= 41 & enlistyear <= 45)
appendix_figure_c3 <- ggplot(df_summary_men,
                              aes(x = enlistyear, y = vols, fill = type)) +
  geom_bar(stat = "identity", position = position_dodge(1.05), width = 0.85) +
  geom_text(aes(label = vols, col = type), 
            vjust = -1.25, size = 2.75, 
            position = position_dodge(0.9)) +
  scale_fill_manual(labels = c("All Men", "White Men", "Black Men"),
                    values = c("grey20", "grey40", "grey60")) +
  scale_color_manual(values = c("grey20", "grey40", "grey60")) +
  guides(color = "none") +
  theme_bw() +
  ylim(0, 1000000) +
  labs(x = "Year", y = "Volunteers", fill = "")
png("appendix_figure_c3.png", res = 100)
print(appendix_figure_c3)
dev.off()


## Figure C4: Influences on Male Volunteerism
model_men <- feols(mvol_prop ~ 
                     mfgwages_prop + retwages_prop + 
                     wholwage_prop + servwage_prop +
                     unemployed_prop + fgrad_prop +
                     totpop40 + urb940_prop +
                     Dem1940 + demcong1940 +
                     dissimilarity_all + isolation_all +
                     wf_prop + wm_prop + negf21_prop + negm21_prop, 
                   data = dat, fixef = c("state", "year"),
                   se = "cluster", cluster = "fipscode")

model_men_white <- feols(mvol_white_prop ~ 
                           mfgwages_prop + retwages_prop + 
                           wholwage_prop + servwage_prop +
                           unemployed_prop + fgrad_prop +
                           totpop40 + urb940_prop +
                           Dem1940 + demcong1940 + 
                           dissimilarity_all + isolation_all +
                           wf_prop + wm_prop + negf21_prop + negm21_prop, 
                         data = dat, fixef = c("state", "year"), 
                         se = "cluster", cluster = "fipscode")

model_men_black <- feols(mvol_black_prop ~ 
                           mfgwages_prop + retwages_prop + 
                           wholwage_prop + servwage_prop +
                           unemployed_prop + fgrad_prop +
                           totpop40 + urb940_prop +
                           Dem1940 + demcong1940 +
                           dissimilarity_all + isolation_all +
                           wf_prop + wm_prop + negf21_prop + negm21_prop, 
                         data = dat, fixef = c("state", "year"), 
                         se = "cluster", cluster = "fipscode")

# Tidy
models_men <- rbind(
  tidy(model_men) %>% mutate(model = 1),
  tidy(model_men_white) %>% mutate(model = 2),
  tidy(model_men_black) %>% mutate(model = 3)) %>%
  filter(!grepl("factor", term)) %>%
  mutate(model = ifelse(model == 1, "All", 
                        ifelse(model == 2, "White",
                               ifelse(model == 3, "Black", NA))),
         coef_names = rep(c("Manufacturing Wages", "Retail Wages",
                            "Wholesale Wages", "Service Wages", "Unemployment",
                            "Pct. Female Graduates", "Total Population",
                            "Pct. Urban", "Roosevelt Vote Share", 
                            "Dem Cong. Vote Share", "Racial Dissimilarity",
                            "Racial Isolation", "Pct. Women (W)", "Pct. Men (W)",
                            "Pct. Women (B)", "Pct. Men (B)"), 3)) %>%
  mutate(term = coef_names)

appendix_figure_c4 <- dwplot(x = models_men, dodge_size = 0.5, ci = 0.95,
                            dot_args = list(size = 1),
                            whisker_args = list(size = 1),
                            vline = geom_vline(xintercept = 0.0,
                                               colour = "black",
                                               linetype = 2)) +
  scale_colour_manual(labels = c("Black Men", "White Men", "All Men"),
                      values = c("grey60", "grey40", "grey20")) +
  theme_bw() + xlab("") +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  ggtitle("") +
  theme(plot.title = element_text(face = "bold"),
        legend.title = element_blank(),
        legend.position = "bottom")
png("appendix_figure_c4.png", res = 95)
print(appendix_figure_c4)
dev.off()


## Table D1: Coefficients for Female Volunteerism
# Inverse hyperbolic sine transformation
model_women_asinh <- feols(asinh_wvol ~ 
                             asinh_mfgwages + asinh_retwages + 
                             asinh_wholewages + asinh_servwages +
                             asinh_unemployed + asinh_fgrad +
                             asinh_totpop40 + asinh_urb40 +
                             Dem1940 + demcong1940 +
                             dissimilarity_all + isolation_all +
                             asinh_wf + asinh_wm + asinh_negf21 + asinh_negm21, 
                           data = dat, fixef = c("state", "year"),
                           se = "cluster", cluster = "fipscode")

model_women_white_asinh <- feols(asinh_wvol_white ~ 
                                   asinh_mfgwages + asinh_retwages + 
                                   asinh_wholewages + asinh_servwages +
                                   asinh_unemployed + asinh_fgrad +
                                   asinh_totpop40 + asinh_urb40 +
                                   Dem1940 + demcong1940 + 
                                   dissimilarity_all + isolation_all +
                                   asinh_wf + asinh_wm + asinh_negf21 + asinh_negm21, 
                                 data = dat, fixef = c("state", "year"), 
                                 se = "cluster", cluster = "fipscode")

model_women_black_asinh <- feols(asinh_wvol_black ~ 
                                   asinh_mfgwages + asinh_retwages + 
                                   asinh_wholewages + asinh_servwages +
                                   asinh_unemployed + asinh_fgrad +
                                   asinh_totpop40 + asinh_urb40 +
                                   Dem1940 + demcong1940 +
                                   dissimilarity_all + isolation_all +
                                   asinh_wf + asinh_wm + asinh_negf21 + asinh_negm21, 
                                 data = dat, fixef = c("state", "year"), 
                                 se = "cluster", cluster = "fipscode")

# Output into a LaTeX document editor
etable(model_women_asinh, model_women_white_asinh, model_women_black_asinh, 
       cluster = "fipscode", signif.code = c("**" = 0.01, "*" = 0.05, "." = 0.1),
       tex = T)